from ma.commons.core.constants import MAP, REDUCE
from ma.fs.dfs.dfsflags import *
import ma.const
import ma.log


class TaskScheduler(object):
    
    """this is the scheduler that guides an NT as to which task of which job to pick up
    in case it has capacity to pick up more tasks
    """
    
    def __init__(self, list_of_jobs, nodetracker):
        
        """this function will build up the jobs list that is consulted while 
        scheduling tasks for an NT; basic principle of scheduling is round robin 
        between jobs and to assign maps and reduces per job in the ratio 
        total-maps/total-reduces
        """
        self.__log = ma.log.get_logger("ma.mrimprov")
        
        self.nodetracker = nodetracker
        #ideally this function shud build up its jobs list from the HDFS
        #TO DO CODE build up 
        self.__jobs_scheduler_dict = {}
        self.updateNewJobs(list_of_jobs)
            
        #also have an iterator over the dict that can persistently mark 
        #the last job from which a task was scheduled 
        self.iterator = iter(self.__jobs_scheduler_dict.keys())
        
        #to persist the current iterator value
        self.job_id = next(self.iterator)
        #extracting job data
        self.job_data = self.__jobs_scheduler_dict[self.job_id]
        
        #self.dfsflags = DfsFlags(self.hdfs)
        
        self.iterator = None
    
        
    def addJobToDict(self, job_id, maps_to_red_ratio):
    
        """this function will add a new job to the list of jobs 
        """
        schedule_multiply_factor = ma.const.XmlData.get_int_data(ma.const.xml_schedule_multiply_factor)
        print("Multiply factor:", schedule_multiply_factor)
        #this dict has jobid: [map_to_red_ratio*3, no. of maps scheduled, if all maps complete]
        self.__jobs_scheduler_dict[job_id] = [maps_to_red_ratio * schedule_multiply_factor, 0, False]
    
        
    def markMapsComplete(self, job_id):
        
        if job_id in self.__jobs_scheduler_dict:
            self.__jobs_scheduler_dict[job_id][2] = True
            self.__log.info('---- Maps Completed for job %d', job_id)


    def removeInactiveJobsFromDict(self, inactive_jobs):
        
        """this function will remove a completed or killed job from the scheduler's
        list.
        """
        for i in inactive_jobs:
            #deleting the inactive job
            del  self.__jobs_scheduler_dict[i]
            
        if self.job_id in inactive_jobs:
            #if the current job that is being scheduled has become inactive then we shift to the first job in the dict
                iterator = iter(self.__jobs_scheduler_dict.keys())
                self.job_id = next(iterator)    
                self.job_data = self.__jobs_scheduler_dict[self.job_id]
                iterator = None
                print("iterator moved cause pointed to job was inactive")
    
    def refreshJobsInfo(self, active_jobs):
        
        """this func will update its resident jobs list and return a list of inactive jobs 
        that the NT can do housekeeping for at its end 
        """
        
        # TODO: need to be correctly init'ed
        new_jobs = []
        inactive_jobs = []
        # TODO: need to be correctly init'ed
        
        for i in active_jobs:
            
            #if the job_id is not found in the sheduler's job dict then
            #add it to inactive jobs list
            if i not in self.__jobs_scheduler_dict:
                new_jobs.append(i)
                    
        for j in self.__jobs_scheduler_dict:
            #if job not in active jobs
            if j not in active_jobs:
                inactive_jobs.append(j)

        self.__log.info('Current job dict: %s, New jobs: %s, Inactive jobs: %s', str(self.__jobs_scheduler_dict), str(new_jobs), str(inactive_jobs))
                        
        #to update the jobs dict with new jobs
        self.updateNewJobs(new_jobs)
        #to take out inactive jobs form the jobs dict
        self.removeInactiveJobsFromDict(inactive_jobs)
        #inactive jobs returned to the NT for housekeeping of tasks that are running on this NT for these inactive jobs    
        return inactive_jobs
        
        
    def updateNewJobs(self, new_jobs):        
        """ this func will talk to the job xml files to get job info regarding
        these new jobs and add them to the jobs_scheduler dict.
        """
        
        for job_id in new_jobs:
            maps_to_reduces_ratio = ma.const.JobsXmlData.get_int_data(ma.const.xml_map_to_reduce_schedule_ratio, job_id)
            print("Map to reduces schedule ratio:", maps_to_reduces_ratio)
            self.addJobToDict(job_id, maps_to_reduces_ratio)
            self.nodetracker.localizeJob(job_id)
            
    
    def scheduleTasks(self):
        
        """this function will return a list of suggested reduce or map tasks of a job for the NT to run;
        basic principle of scheduling is round robin between jobs and to assign maps
        and reduces per job in the ratio total-maps/total/reduces
        """
        self.__log.info('Fetching list of maps or reduces for scheduling task')
        
        #in case job_dict is not empty        
        if self.job_data != None:
            
            prev_job_id = self.job_id
            #to assign an iterator to the dict and point it to the first job
            self.iterator = iter(self.__jobs_scheduler_dict.keys())
            self.job_id = next(self.iterator)
            
            #try to get to last scheduled jobs position
            for j in self.__jobs_scheduler_dict:
                if prev_job_id != j:
                    self.job_id = next(self.iterator)
                else: 
                    break
            #increment to the next job in the dict. if job dict cannot be iterated then move to head of dict    
            try:
                self.job_id = next(self.iterator)
            except StopIteration:
                self.iterator = iter(self.__jobs_scheduler_dict.keys())
                self.job_id = next(self.iterator)
            
            self.job_data = self.__jobs_scheduler_dict[self.job_id]
            
            # if brickwall needed
            if ma.const.JobsXmlData.get_int_data(ma.const.xml_brickwall_mrplus, self.job_id) == 1:
                self.__log.info("Scheduler forcing brickwall!")
                schedule_ratio_condition = True
            else:
                schedule_ratio_condition = self.job_data[1] < self.job_data[0]
            
            #3)if maps assigned for a job < maps_to_red_ratio then suggest a map task
            #and map phase id not complete  
            if self.job_data[2] == False and schedule_ratio_condition: 
                #return the job_id whose map is suggested and undone map-ids for that job
                self.job_data[1] += 1
                return self.job_id, MAP
                   
            #else if maps phase is complete or maps assigned for a job >= maps_to_red_ratio
            #then suggest a reduce task      
            if self.job_data[2] == True or self.job_data[1] >= self.job_data[0]:
                self.job_data[1] = 0
                return self.job_id, REDUCE
                
            return -1, [], None
